/*
  author Sylvain Bertrand <sylvain.bertrand@gmail.com>
  Protected by linux GNU GPLv2
  Copyright 2012-2014
*/
#include <linux/pci.h>
#include <asm/byteorder.h>
#include <linux/delay.h>
#include <linux/cdev.h>
#include <linux/vmalloc.h>

#include <alga/alga.h>
#include <alga/rng_mng.h>
#include <alga/timing.h>
#include <uapi/alga/pixel_fmts.h>
#include <uapi/alga/amd/dce6/dce6.h>

#include "mc.h"
#include "ih.h"
#include "rlc.h"
#include "fence.h"
#include "ring.h"
#include "dmas.h"
#include "ba.h"
#include "cps.h"
#include "gpu.h"
#include "drv.h"

#include "regs.h"

#include "ba_private.h"

/* we may have to free partially the cpu_ps array */
static void cpu_ps_free_partial(struct ba_map *m, u64 n)
{
	while (n--)
		__free_page(m->cpu_ps[n]);
	kfree(m->cpu_ps);
}

static void cpu_ps_free(struct ba_map *m)
{
	cpu_ps_free_partial(m, m->cpu_ps_n);
}

static void cpu_kernel_unmap(struct ba_map *m)
{
	vunmap(m->cpu_addr);
	cpu_ps_free(m);
}

static long cpu_kernel_map(struct pci_dev *dev, struct ba_map *m)
{
	struct dev_drv_data *dd;
	u64 cpu_p;
	u64 map_sz;

	dd = pci_get_drvdata(dev);

	map_sz = m->ptes_n << GPU_PAGE_SHIFT;
	if (map_sz & ~PAGE_MASK) {
		dev_err(&dev->dev, "ba:core_sg_kernel:map size is not aligned on CPU PAGE_SIZE\n");
		goto err;
	}
	m->cpu_ps_n = map_sz >> PAGE_SHIFT;
	m->cpu_ps = kzalloc(m->cpu_ps_n * sizeof(*m->cpu_ps), GFP_KERNEL);

	for (cpu_p = 0; cpu_p < m->cpu_ps_n; ++cpu_p) {
		m->cpu_ps[cpu_p] = alloc_page(GFP_KERNEL | __GFP_ZERO
								| __GFP_NOWARN);
		if (!m->cpu_ps[cpu_p]) {
			if (cpu_p)
				cpu_ps_free_partial(m, cpu_p - 1);
			goto err;
		}
	}

	m->cpu_addr = vmap(m->cpu_ps, m->cpu_ps_n, VM_MAP, PAGE_KERNEL);
	if (m->cpu_addr == NULL)
		goto err_free_ps;
	return 0;

err_free_ps:
	cpu_ps_free(m);
err:
	return -BA_ERR;
}

static void cpu_bus_unmap(struct pci_dev *dev, struct ba_map *m)
{
	dma_unmap_sg(&dev->dev, m->sg_tbl.sgl, m->sg_tbl.nents, DMA_TO_DEVICE);
	sg_free_table(&m->sg_tbl);
}

static long cpu_bus_map(struct pci_dev *dev, struct ba_map *m)
{
	long r;
	r = sg_alloc_table_from_pages(&m->sg_tbl, &m->cpu_ps[0], m->cpu_ps_n, 0,
					m->cpu_ps_n << PAGE_SHIFT, GFP_KERNEL);
	if (r != 0)
		goto err;

	m->sg_tbl_list_nents = dma_map_sg(&dev->dev, m->sg_tbl.sgl,
						m->sg_tbl.nents, DMA_TO_DEVICE);
	if (m->sg_tbl_list_nents == 0) {
		goto err_free_sg_table;
	}
	return 0;

err_free_sg_table:
	sg_free_table(&m->sg_tbl);
err:
	return -BA_ERR;
}

/* return the updated pte_addr */
static u64 bus_segment_map(struct pci_dev *dev, struct ba_map *m,
			u64 bus_segment_addr, u64 bus_segment_sz, u64 pte_addr)
{
	u64 gpu_ps_n;
	u64 gpu_p;
	u64 bus_addr;

	bus_addr = bus_segment_addr;
	gpu_ps_n = GPU_PAGE_IDX(bus_segment_sz);

	for (gpu_p = 0; gpu_p < gpu_ps_n; ++gpu_p) {	
		pte_mmio_regs_install(dev, pte_addr, bus_addr);
		pte_addr += PTE_SZ;
		bus_addr += GPU_PAGE_SZ;
	}
	return pte_addr;
}

/* restore the n first ptes */
static void ptes_restore_partial(struct pci_dev *dev, struct ba_map *m, u64 n)
{
	struct dev_drv_data *dd;
	u64 pte_addr;
	u64 dummy_pte;

	dd = pci_get_drvdata(dev);

	dummy_pte = dd->ba.dummy_bus_addr | PTE_VALID | PTE_SYSTEM
				| PTE_SNOOPED | PTE_READABLE | PTE_WRITEABLE;
	pte_addr = m->ptes_start;

	while (n--) {
		vram_w32(dev, lower_32_bits(dummy_pte), pte_addr);
		vram_w32(dev, upper_32_bits(dummy_pte), pte_addr);
		pte_addr += PTE_SZ;
	}
}

static void ptes_restore(struct pci_dev *dev, struct ba_map *m)
{
	ptes_restore_partial(dev, m, m->ptes_n);
}

static void gpu_bus_unmap(struct pci_dev *dev, struct ba_map *m, u8 flgs)
{
	struct dev_drv_data *dd;
	dd = pci_get_drvdata(dev);

	if (!(flgs & BA_NO_PT_UPDATE))
		ptes_restore(dev, m);
	rng_free(&dd->ba.mng, m->gpu_addr);
}

void core_sg_kernel_cleanup(struct pci_dev *dev, struct ba_map *m, u8 flgs)
{
	gpu_bus_unmap(dev, m, flgs);
	cpu_bus_unmap(dev, m);
	cpu_kernel_unmap(m);
	kfree(m);
}

static long gpu_bus_map(struct pci_dev *dev, struct ba_map *m)
{
	struct dev_drv_data *dd;
	struct scatterlist *sg;
	int i;
	long r;
	u64 first_pte_idx;
	u64 pte_addr;
	u64 map_sz;

	dd = pci_get_drvdata(dev);

	map_sz = m->ptes_n << GPU_PAGE_SHIFT;
	r = rng_alloc_align(&m->gpu_addr, &dd->ba.mng, map_sz, GPU_PAGE_SZ);
	if (r == -ALGA_ERR)
		goto err;

	first_pte_idx = GPU_PAGE_IDX(m->gpu_addr - dd->ba.mng.s);
	m->ptes_start = dd->ba.pt_start + first_pte_idx * PTE_SZ;
	pte_addr = m->ptes_start;

	for_each_sg(m->sg_tbl.sgl, sg, m->sg_tbl_list_nents, i) {
		dma_addr_t bus_segment_addr;
		unsigned int bus_segment_sz;

		bus_segment_addr = sg_dma_address(sg);
		bus_segment_sz = sg_dma_len(sg);

		if (!IS_GPU_PAGE_ALIGNED(bus_segment_addr)) {
			dev_err(&dev->dev,"ba:core_sg_kernel:trying to map a bus segment not aligned on a gpu page\n");
			goto err_restore_ptes;
		}

		if (!IS_GPU_PAGE_ALIGNED(bus_segment_sz)) {
			dev_err(&dev->dev,"ba:core_sg_kernel:trying to map a bus segment of size not aligned on gpu page size\n");
			goto err_restore_ptes;
		}
		pte_addr = bus_segment_map(dev, m, bus_segment_addr, bus_segment_sz,
										pte_addr);
	}
	return 0;

err_restore_ptes:
	ptes_restore_partial(dev, m, (pte_addr - m->ptes_start) / PTE_SZ);
	rng_free(&dd->ba.mng, m->gpu_addr);
err:
	return -BA_ERR;
}

/* just in case... */
static void cpu_ps_dummy_fill(struct pci_dev *dev, struct ba_map *m)
{
	struct dev_drv_data *dd;
	u64 __iomem *pte_addr;
	u64 ptes_n;
	u64 dummy_pte;

	dd = pci_get_drvdata(dev);

	dummy_pte = dd->ba.dummy_bus_addr | PTE_VALID | PTE_SYSTEM
				| PTE_SNOOPED | PTE_READABLE | PTE_WRITEABLE;

	pte_addr = m->cpu_addr;
	ptes_n = m->ptes_n;
	while (ptes_n--)
		*pte_addr++ = dummy_pte;
}

long core_sg_kernel_map(struct pci_dev *dev, u64 sz, struct ba_map **m)
{
	struct dev_drv_data *dd;
	long r;

	if (!IS_GPU_PAGE_ALIGNED(sz)) {
		dev_err(&dev->dev, "ba:core_sg_kernel:size not aligned on gpu page size\n");
		goto err;
	}

	*m = kzalloc(sizeof(**m), GFP_KERNEL);
	if (*m == NULL) {
		dev_err(&dev->dev, "ba:core_sg_kernel:unable to allocate memory for map\n");
		goto err;
	}

	(*m)->type = BA_MAP_KERNEL_SG;
	(*m)->ptes_n = GPU_PAGE_IDX(sz);

	dd = pci_get_drvdata(dev);

	r = cpu_kernel_map(dev, *m);
	if (r == -BA_ERR) {
		dev_err(&dev->dev, "ba:core_sg_kernel:unable to create cpu mapping\n");
		goto err_free_map;
	}

	cpu_ps_dummy_fill(dev, *m);
	
	r = cpu_bus_map(dev, *m);
	if (r == -BA_ERR) {
		dev_err(&dev->dev, "ba:core_sg_kernel:unable to create cpu bus mapping\n");
		goto err_unmap_cpu;
	}

	r = gpu_bus_map(dev, *m);
	if (r == -BA_ERR) {
		dev_err(&dev->dev, "ba:core_sg_kernel:unable to create gpu bus mapping\n");
		goto err_unmap_cpu_bus;
	}

	list_add(&(*m)->n, &dd->ba.maps);

	wmb();

	tlb_flush(dev);

	dev_info(&dev->dev, "ba:core_sg_kernel:aperture mapped gpu_addr=0x%016llx cpu_addr=0x%p ptes_n=0x%016llx\n",
				(*m)->gpu_addr, (*m)->cpu_addr, (*m)->ptes_n);
	return 0;

err_unmap_cpu_bus:
	cpu_bus_unmap(dev, *m);

err_unmap_cpu:
	cpu_kernel_unmap(*m);

err_free_map:
	kfree(*m);
err:
	return -BA_ERR;
}
